2023/12/231031字符
数据爬取
requests
import requests
str = requests.get('http://bozai.tech/')
str.encoding = 'utf-8'
print(str.text)
# print(str.status_code) #--> 200 状态码
# print(str.encoding) #--> utf-8 字符编码
# print(str.content) #--> utf-8 http 响应的二进制方式
# print(str.json) # json 格式
# print(str.raise_for_status()) # 如果返回不是 200,则报异常
模拟浏览器
import requests
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'
}
str = requests.get('http://bozai.tech/', headers=headers)
str.encoding = 'utf-8'
print(str.text)
标签检索
import requests
from bs4 import BeautifulSoup
data = requests.get('http://jzfyjnkj.com/')
data.encoding = 'utf-8'
# print(open(data.text))
html = data.text
suop = BeautifulSoup(html, features="html5lib")
print(suop.body.find_all('img'))